{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# New York Times downloader\n",
    "This is an independent NY Times downloader that requested by many people.\n",
    "\n",
    "Since the Newsapi cannot download News more than 6 months window, I have rewrite the code for downloading news without Newsapi to download news since 1990."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import requests\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "API key can be requested in the NY Times developer console\n",
    "\n",
    "https://developer.nytimes.com/signup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "api = \"af40190ac3274d36aee4a7e801e865c7\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def download_json(year, month, api):\n",
    "    \"Download news for a particular year and month and save as a json file\"\n",
    "    \n",
    "    url = \"http://api.nytimes.com/svc/archive/v1/{}/{}.json?api-key={}\"\n",
    "    url = url.format(year, month, api)\n",
    "    \n",
    "    file_str = 'data/' + str(year) + '-' + '{:02}'.format(month) + '.json'\n",
    "\n",
    "    items = requests.get(url)\n",
    "    \n",
    "    try:\n",
    "        data = items.json()\n",
    "\n",
    "        with open(file_str, 'w') as f:\n",
    "            json.dump(data, f)\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    return \"Finished downloading {}/{}\".format(year, month)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Finished downloading 2018/2'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "download_json(2018, 2, api)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Uncomment to download it since 1990 to present\n",
    "#for i in range(1990, 2019):\n",
    "#    for j in range(1, 13):\n",
    "#        download_json(i, j, api)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def open_json(year, month):\n",
    "    \"Save as pandas dataframe\"\n",
    "    file_str = 'data/' + str(year) + '-' + '{:02}'.format(month) + '.json'\n",
    "    with open(file_str) as data_file:    \n",
    "        NYTimes_data = json.load(data_file)\n",
    "    \n",
    "    date_list = []\n",
    "    df = pd.DataFrame()  \n",
    "    df['News'] = None\n",
    "    \n",
    "\n",
    "    for i in range(len(NYTimes_data[\"response\"][\"docs\"][:])):\n",
    "        if NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10] not in df.index:\n",
    "            df.ix[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]] = NYTimes_data[\"response\"][\"docs\"][:][i]['headline']['main']\n",
    "        else:\n",
    "            df.ix[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]] = df.ix[NYTimes_data[\"response\"][\"docs\"][i][\"pub_date\"][:10]].values + NYTimes_data[\"response\"][\"docs\"][:][i]['headline']['main']\n",
    "    \n",
    "    df.index = pd.to_datetime(df.index, format='%Y-%m-%d')\n",
    "    df.sort_index(inplace=True)\n",
    "    \n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>News</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2018-05-01</th>\n",
       "      <td>At a New York Gallery, Confronting the Art Wor...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-02</th>\n",
       "      <td>Israel, Mike Pompeo, Liverpool: Your Thursday ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-03</th>\n",
       "      <td>What’s on TV Thursday: ‘The Big Bang Theory’ a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-04</th>\n",
       "      <td>How a California Court Ruling Might Affect Min...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-05</th>\n",
       "      <td>Korean Air Staff in Protest Against ‘Nut Rage’...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-06</th>\n",
       "      <td>Lawsuit Accuses Taekwondo Olympian and His Coa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-07</th>\n",
       "      <td>What Is Wrong With Malaysia?The Fates of Those...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-08</th>\n",
       "      <td>Developers Add a Missing Piece to Their Projec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-09</th>\n",
       "      <td>Dream Logic: Hiro Murai on the Look of ‘Atlant...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-10</th>\n",
       "      <td>24 Art Exhibitions to View in N.Y.C. This Week...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-11</th>\n",
       "      <td>Nine Teaching Ideas for Using Music to Inspire...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-12</th>\n",
       "      <td>Here Are the Biggest Stories in American Polit...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-13</th>\n",
       "      <td>What the Fastest Growth in the U.S. Means for ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-14</th>\n",
       "      <td>What the Heck Is That?: ERISYour Best Tips for...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-15</th>\n",
       "      <td>Quotation of the Day: Political Coup for Trump...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-16</th>\n",
       "      <td>Counting Years, Shedding SecondsWhat to See in...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-17</th>\n",
       "      <td>Quotation of the Day: Tillerson, in Swipe at T...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-18</th>\n",
       "      <td>New Youth Center at American Indian Museum Foc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-19</th>\n",
       "      <td>Flying With the Enemy: The Warriors Give the R...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-20</th>\n",
       "      <td>Carolyn Dooley, Jonathan ThompsonTracy Dong, J...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-21</th>\n",
       "      <td>Robert Indiana, 89, Who Turned ‘Love’ Into End...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-22</th>\n",
       "      <td>200 Professors Call for Ouster of U.S.C. Presi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-23</th>\n",
       "      <td>Letter of Recommendation: Drinking at LunchIf ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-24</th>\n",
       "      <td>An Unseeded Serena Williams Looms in the Frenc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-25</th>\n",
       "      <td>In Fine Print of $25 Billion Offer, a Bid for ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-26</th>\n",
       "      <td>Makers of ‘Sesame Street’ Sue to Get Raunchy P...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-27</th>\n",
       "      <td>Sara Ganim, Daniel CevallosLisa O’Connell, Mic...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-28</th>\n",
       "      <td>A Colonial Cemetery in a Park, and a Question:...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-29</th>\n",
       "      <td>What’s on TV Tuesday: ‘Arrested Development’ a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-30</th>\n",
       "      <td>What Is Your Relationship With Your Siblings L...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2018-05-31</th>\n",
       "      <td>What’s on TV Thursday: N.B.A. Finals and ‘A Br...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                         News\n",
       "2018-05-01  At a New York Gallery, Confronting the Art Wor...\n",
       "2018-05-02  Israel, Mike Pompeo, Liverpool: Your Thursday ...\n",
       "2018-05-03  What’s on TV Thursday: ‘The Big Bang Theory’ a...\n",
       "2018-05-04  How a California Court Ruling Might Affect Min...\n",
       "2018-05-05  Korean Air Staff in Protest Against ‘Nut Rage’...\n",
       "2018-05-06  Lawsuit Accuses Taekwondo Olympian and His Coa...\n",
       "2018-05-07  What Is Wrong With Malaysia?The Fates of Those...\n",
       "2018-05-08  Developers Add a Missing Piece to Their Projec...\n",
       "2018-05-09  Dream Logic: Hiro Murai on the Look of ‘Atlant...\n",
       "2018-05-10  24 Art Exhibitions to View in N.Y.C. This Week...\n",
       "2018-05-11  Nine Teaching Ideas for Using Music to Inspire...\n",
       "2018-05-12  Here Are the Biggest Stories in American Polit...\n",
       "2018-05-13  What the Fastest Growth in the U.S. Means for ...\n",
       "2018-05-14  What the Heck Is That?: ERISYour Best Tips for...\n",
       "2018-05-15  Quotation of the Day: Political Coup for Trump...\n",
       "2018-05-16  Counting Years, Shedding SecondsWhat to See in...\n",
       "2018-05-17  Quotation of the Day: Tillerson, in Swipe at T...\n",
       "2018-05-18  New Youth Center at American Indian Museum Foc...\n",
       "2018-05-19  Flying With the Enemy: The Warriors Give the R...\n",
       "2018-05-20  Carolyn Dooley, Jonathan ThompsonTracy Dong, J...\n",
       "2018-05-21  Robert Indiana, 89, Who Turned ‘Love’ Into End...\n",
       "2018-05-22  200 Professors Call for Ouster of U.S.C. Presi...\n",
       "2018-05-23  Letter of Recommendation: Drinking at LunchIf ...\n",
       "2018-05-24  An Unseeded Serena Williams Looms in the Frenc...\n",
       "2018-05-25  In Fine Print of $25 Billion Offer, a Bid for ...\n",
       "2018-05-26  Makers of ‘Sesame Street’ Sue to Get Raunchy P...\n",
       "2018-05-27  Sara Ganim, Daniel CevallosLisa O’Connell, Mic...\n",
       "2018-05-28  A Colonial Cemetery in a Park, and a Question:...\n",
       "2018-05-29  What’s on TV Tuesday: ‘Arrested Development’ a...\n",
       "2018-05-30  What Is Your Relationship With Your Siblings L...\n",
       "2018-05-31  What’s on TV Thursday: N.B.A. Finals and ‘A Br..."
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = open_json(2018, 5)\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
